Reading ENIGH microdata (CSV files)

Check Energy Expenses

Check all the energy expenses in the households expenses data set and people expenses data sets are the same than the ones reported in the households condensed data set.

df_hsd_condens$energia is equal to the sum of gastoshogar.gasto_tri if clave is equal to G009-G016, R001, R003 plus gastospersona.gasto_tri if clave is equal to G009-G016, R001, R003.

Code (clave) Description
G009 Liquefied petroleum gas
G010 Petroleum
G011 Diesel
G012 Carbon
G013 Firewood
G014 Fuel to heat
G015 Candles
G016 Other fuels
R001 Electricity
R003 Natural Gas
#df_person_energy
# Create household id, this will be the key attribute in the SQL table
df_hsd_energy$id_household <- paste0(df_hsd_energy$folioviv, df_hsd_energy$foliohog)
df_person_energy$id_household <- paste0(df_person_energy$folioviv, df_person_energy$foliohog)

Merging data sets by id_household using energy expenses related data

df_hsd_energy_by_hsd <- df_hsd_energy %>%
  group_by(id_household, clave) %>% replace(is.na(.), 0) %>% 
  summarise(gasto_tri = sum(gasto_tri))

df_psn_energy_by_hsd <- df_person_energy %>%
  group_by(id_household, clave) %>% replace(is.na(.), 0) %>% 
  summarise(gasto_tri = sum(gasto_tri))
df_energy_by_hsd <- merge(df_hsd_energy_by_hsd, df_psn_energy_by_hsd, 
                          by="id_household", all.y = TRUE, all.x = TRUE) %>% 
  replace(is.na(.), 0)
df_hsd_energy_by_hsd <- as.data.frame(df_hsd_energy_by_hsd)
df_hsd_energy_by_hsd
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G009'] <- 'LPG'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G010'] <- 'Petroleum'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G011'] <- 'Diesel'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G012'] <- 'Carbon'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G013'] <- 'Firewood'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G014'] <- 'Fuel_to_heat'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G015'] <- 'Candles'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G016'] <- 'Other'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='R001'] <- 'Electricity'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='R003'] <- 'NG'
#df_hsd_energy_by_hsd$clave <-as.factor(df_hsd_energy_by_hsd$clave)
df_hsd_by_hsd_wd <- reshape(df_hsd_energy_by_hsd, v.names = 'gasto_tri',
                            timevar='clave', idvar="id_household", sep = "_",
                            direction="wide")

df_hsd_by_hsd_wd

Descriptive Statistics

descr(df_hsd_by_hsd_wd, stats = c("mean", "sd", "min", "med", "max", "n.valid", "pct.valid"), 
      transpose = TRUE)

Box Plots

pal_plot <- c('grey','red', 'rgb(7,40,89)', 'green')
pal_plot <- setNames(pal_plot, c("elect", "ng", "candles", "lpg"))

#df_hsd_by_hsd_wd[is.na(df_hsd_by_hsd_wd)] = 0

pl_energy_exp_box <- plot_ly(type = 'box') %>%
  add_boxplot(y = df_hsd_by_hsd_wd$gasto_tri_Electricity, 
              boxpoints = 'outliers', 
              name = "Electricity", 
              color = list(color =pal_plot['elect']),
              marker = list(color = pal_plot['elect']), 
              line=list(color = pal_plot['elect']) ) %>%
  add_boxplot(y = df_hsd_by_hsd_wd$gasto_tri_NG, 
              boxpoints = 'outliers', 
              name = "Natural Gas", 
              color = list(color =pal_plot['ng']),
              marker = list(color = pal_plot['ng']), 
              line=list(color = pal_plot['ng']) ) %>%
  add_boxplot(y = df_hsd_by_hsd_wd$gasto_tri_Candles, 
              boxpoints = 'outliers', 
              name = "Candles", 
              color = list(color =pal_plot['candles']),
              marker = list(color = pal_plot['candles']), 
              line=list(color = pal_plot['candles']) ) %>%
  add_boxplot(y = df_hsd_by_hsd_wd$gasto_tri_LPG, 
              boxpoints = 'outliers', 
              name = "LPG", 
              color = list(color =pal_plot['lpg']),
              marker = list(color = pal_plot['lpg']), 
              line=list(color = pal_plot['lpg']) ) %>%
  layout(title = "Boxplots of Expenses by Household per Type of Energy", 
         yaxis = list(title = "$[MXN]", range = c(0, 5000)))

pl_energy_exp_box

plotly_IMAGE(pl_energy_exp_box, format = "png", out_file = "./figs/box_energy_expenses.png" )

Boxplots of energy expenses per household

Analysis of Expenses by Dwelling

Adding factor


subset(df_dwell, select=c('folioviv', 'factor'))

#df_hsd_by_hsd_wd <- merge(df_hsd_by_hsd_wd, df_dwell, 
#                          by="folioviv", all.y = TRUE, all.x = TRUE)